library(tibble)
library(tidyr)
library(readr)
library(stringr)
library(ggplot2)
library(dplyr)
library(cowplot)
library(uwot)
library(Rphenograph)
library(ComplexHeatmap)
theme_set(theme_cowplot())
# if(!require(devtools)){
# install.packages("devtools") # If not already installed
# }
# devtools::install_github("JinmiaoChenLab/Rphenograph")
data_norm_sub <- read.csv("~/Documents/INsTRuCT_workshop/data_norm_sub5.csv")
colnames(data_norm_sub)
data_norm_sub %>% count(Run,id,Individuals,Group,Severity,Disease.phase,max..WHO.scale,sev_merge,Days.post.symptom.onset,Week, sev_week,followup)
panel <- colnames(data_norm_sub)[15:54]
data_norm_sub %>%
dplyr::filter(CD3>0, CD45>0) %>%
sample_frac(0.1) %>%
mutate_at(vars(panel),asinh) %>%
dplyr::filter(CD45>0, CD3>0) %>%
ggplot(aes(x=CD45, y=CD3)) +
geom_point(size = 0.01, alpha = 0.1) +
geom_density_2d()+
geom_rect(mapping=aes(xmin=1, xmax=8, ymin=4.3, ymax=8), color="black", alpha=0)
data_norm_sub %>%
mutate_at(vars(panel),asinh) %>%
dplyr::filter(CD3>4.3,
CD45>1,
CD15>0,CD19>0) %>%
ggplot(aes(x=CD19, y=CD15)) +
geom_point(size = 0.01, alpha = 0.1) +
geom_density_2d()+
geom_rect(mapping=aes(xmin=0, xmax=2.9, ymin=0, ymax=4), color="black", alpha=0)
data_norm_sub <- data_norm_sub %>% mutate(Tcell = ifelse(CD3>sinh(4.3) & CD45>sinh(1) & CD15<sinh(4) & CD19<sinh(2.9), TRUE, FALSE))
color_severity <- c(
"healthy" = "#0449FF",
"FLI" = "#807F7F",
"HIV" = "#40007F",
"HBV" = "magenta",
"mild/moderate" = "#FFB651",
"severe/critical" = "#F82000")
data_norm_sub <- data_norm_sub %>% mutate(sev_merge = factor(sev_merge,levels = c("healthy","FLI","HIV","HBV","mild/moderate","severe/critical")))
data_norm_sub %>%
count(id,Tcell,sev_merge,Disease.phase) %>%
group_by(id) %>%
mutate(perc = n/sum(n)*100) %>%
ungroup() %>%
dplyr::filter(Tcell) %>%
ggplot(aes(y = perc, x = sev_merge, fill = sev_merge)) +
geom_boxplot(position=position_dodge(1), alpha = 0.7)+
geom_dotplot(binaxis='y', stackdir='center',
position=position_dodge(1), alpha = 0.7)+
facet_grid(~ Disease.phase, space = "free_x", scale = "free_x") +
scale_fill_manual(values = color_severity)+
ylab("Percentage of T cells (%)") +
xlab("")+
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank())
gates <- tibble(
Run = as.numeric(str_sort(unique(data_norm_sub$Run))),
CD8threshold = c(3.9,4.15,3.7,3.7,4.3,4.1,3.8,4.4,5.1,4.85,4.55,4.55,4,5,4.2),
TCRgdthreshold = c(2.85,2.85,2.85,2.85,2.85,2.85,2.85,2.85,2.85,2.85,2.85,2.85,2.85,2.85,3.7))
data_Tcell <- data_norm_sub %>% dplyr::filter(Tcell)
data_Tcell <- data_Tcell %>%
left_join(gates) %>%
mutate(Tcellcompartment = case_when( asinh(TCRgd) <= TCRgdthreshold & asinh(CD8) <= CD8threshold ~ "CD4+",
asinh(TCRgd) <= TCRgdthreshold & asinh(CD8) > CD8threshold ~ "CD8+",
TRUE ~ "TCRgd+"))
data_Tcell %>%
mutate_at(vars(panel),asinh) %>%
ggplot(aes(x=CD8, y=TCRgd)) +
geom_point(aes(color = Tcellcompartment),size = 0.5, alpha = 0.5) +
guides(colour = guide_legend(override.aes = list(size=5)))
data_Tcell %>%
count(Tcellcompartment) %>%
mutate(perc = n/sum(n)*100) %>%
ggplot(aes(x = Tcellcompartment, y = perc, fill = Tcellcompartment, label = round(perc,2) )) +
geom_col(position = "dodge") +
geom_label() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1), legend.position = "none") +
ylab("Percentage of each T cell compartment from all T cells (%)") +
xlab("")
sel_markers <- panel[!panel %in% c("CD45","CD3", "CD19","CD15","CD8","TCRgd","IgD","IgM","CD21","CD14")]
UMAP_TCRgd <- data_Tcell %>%
mutate_at(vars(sel_markers), asinh) %>%
mutate_at(vars(sel_markers), scale) %>%
dplyr::filter(Tcellcompartment == "TCRgd+") %>%
dplyr::select(sel_markers) %>%
uwot::umap(n_neighbors = 30,spread = 1, min_dist = 0.5,metric = "euclidean", verbose = TRUE, fast_sgd = TRUE)
UMAP_CD4 <- data_Tcell %>%
mutate_at(vars(sel_markers), asinh) %>%
mutate_at(vars(sel_markers), scale) %>%
dplyr::filter(Tcellcompartment == "CD4+") %>%
dplyr::select(sel_markers) %>%
uwot::umap(n_neighbors = 30,spread = 1,min_dist = 0.5,metric = "euclidean", verbose = TRUE, fast_sgd = TRUE)
UMAP_CD8 <- data_Tcell %>%
mutate_at(vars(sel_markers), asinh) %>%
mutate_at(vars(sel_markers), scale) %>%
dplyr::filter(Tcellcompartment == "CD8+") %>%
dplyr::select(sel_markers) %>%
uwot::umap(n_neighbors = 30,spread = 1,min_dist = 0.5,metric = "euclidean", verbose = TRUE, fast_sgd = TRUE)
#### Add UMAP information to data frame
data_Tcell$UMAP1 <- NA
data_Tcell$UMAP2 <- NA
data_Tcell$UMAP1[data_Tcell$Tcellcompartment == "CD4+"] <- UMAP_CD4[,1]
data_Tcell$UMAP1[data_Tcell$Tcellcompartment == "CD8+"] <- UMAP_CD8[,1]
data_Tcell$UMAP1[data_Tcell$Tcellcompartment == "TCRgd+"] <- UMAP_TCRgd[,1]
data_Tcell$UMAP2[data_Tcell$Tcellcompartment == "CD4+"] <- UMAP_CD4[,2]
data_Tcell$UMAP2[data_Tcell$Tcellcompartment == "CD8+"] <- UMAP_CD8[,2]
data_Tcell$UMAP2[data_Tcell$Tcellcompartment == "TCRgd+"] <- UMAP_TCRgd[,2]
p1 <- data_Tcell %>%
dplyr::filter(Tcellcompartment == "CD4+") %>%
sample_n(30000) %>%
ggplot(aes(x = UMAP1, y=UMAP2, color = sev_merge)) +
geom_point(alpha = 0.5,size = 2)+
guides(colour = guide_legend(ncol = 1,override.aes = list(size=3, alpha = 1))) +
scale_color_manual(values = color_severity, name = "")+
theme_classic() +
ggtitle("CD4+") +
theme(legend.position = "none",plot.title = element_text(hjust = 0.5))
p2 <- data_Tcell %>%
dplyr::filter(Tcellcompartment == "CD8+") %>%
sample_n(30000) %>%
ggplot(aes(x = UMAP1, y=UMAP2, color = sev_merge)) +
geom_point(alpha = 0.5,size = 2)+
guides(colour = guide_legend(ncol = 1,override.aes = list(size=3, alpha = 1))) +
scale_color_manual(values = color_severity, name = "")+
theme_classic() +
ggtitle("CD8+") +
theme(legend.position = "none",plot.title = element_text(hjust = 0.5))
p3 <- data_Tcell %>%
dplyr::filter(Tcellcompartment == "TCRgd+") %>%
ggplot(aes(x = UMAP1, y=UMAP2, color = sev_merge)) +
geom_point(alpha = 0.5,size = 3)+
guides(colour = guide_legend(ncol = 1,override.aes = list(size=3, alpha = 1))) +
scale_color_manual(values = color_severity, name = "")+
theme_classic() +
ggtitle("TCRgd+") +
theme(legend.position = "right", legend.direction = "vertical",plot.title = element_text(hjust = 0.5))
plot_grid(p1,p2,p3,nrow = 1)
p1 <- data_Tcell %>%
dplyr::filter(Tcellcompartment == "CD4+", Group == "CV19") %>%
ggplot(aes(x = UMAP1, y=UMAP2, color = Disease.phase)) +
geom_point(alpha = 0.5,size = 2)+
guides(colour = guide_legend(ncol = 1,override.aes = list(size=3, alpha = 1))) +
scale_color_manual(values = c("acute" = "red","convalescent"="black"), name = "")+
theme_classic() +
ggtitle("CD4+") +
theme(legend.position = "none",plot.title = element_text(hjust = 0.5))
p2 <- data_Tcell %>%
dplyr::filter(Tcellcompartment == "CD8+", Group == "CV19") %>%
ggplot(aes(x = UMAP1, y=UMAP2, color = Disease.phase)) +
geom_point(alpha = 0.5,size = 2)+
guides(colour = guide_legend(ncol = 1,override.aes = list(size=3, alpha = 1))) +
scale_color_manual(values = c("acute" = "red","convalescent"="black"), name = "")+
theme_classic() +
ggtitle("CD8+") +
theme(legend.position = "none",plot.title = element_text(hjust = 0.5))
p3 <- data_Tcell %>%
dplyr::filter(Tcellcompartment == "TCRgd+", Group == "CV19") %>%
ggplot(aes(x = UMAP1, y=UMAP2, color = Disease.phase)) +
geom_point(alpha = 0.5,size = 3)+
guides(colour = guide_legend(ncol = 1,override.aes = list(size=3, alpha = 1))) +
scale_color_manual(values = c("acute" = "red","convalescent"="black"), name = "")+
theme_classic() +
ggtitle("TCRgd+") +
theme(legend.position = "none",plot.title = element_text(hjust = 0.5))
plot_grid(p1,p2,p3,nrow = 1)
p1 <- data_Tcell %>%
mutate_at(vars(sel_markers), asinh) %>%
mutate_at(vars(sel_markers), scale) %>%
dplyr::filter(Tcellcompartment == "CD4+") %>%
sample_n(30000) %>%
ggplot(aes(x = UMAP1, y=UMAP2, color = HLADR)) +
geom_point(alpha = 0.5,size = 2)+
theme_classic() +
ggtitle("CD4+") +
theme(legend.position = "none",plot.title = element_text(hjust = 0.5)) +
scale_color_gradient2(low = "blue", mid = "grey", high = "red", midpoint = 0)
p2 <- data_Tcell %>%
mutate_at(vars(sel_markers), asinh) %>%
mutate_at(vars(sel_markers), scale) %>%
dplyr::filter(Tcellcompartment == "CD8+") %>%
sample_n(30000) %>%
ggplot(aes(x = UMAP1, y=UMAP2, color = HLADR)) +
geom_point(alpha = 0.5,size = 2)+
theme_classic() +
ggtitle("CD8+") +
theme(legend.position = "none",plot.title = element_text(hjust = 0.5)) +
scale_color_gradient2(low = "blue", mid = "grey", high = "red", midpoint = 0)
p3 <- data_Tcell %>%
mutate_at(vars(sel_markers), asinh) %>%
mutate_at(vars(sel_markers), scale) %>%
dplyr::filter(Tcellcompartment == "TCRgd+") %>%
ggplot(aes(x = UMAP1, y=UMAP2, color = HLADR)) +
geom_point(alpha = 0.5,size = 3)+
theme_classic() +
ggtitle("TCRgd+") +
theme(plot.title = element_text(hjust = 0.5)) +
scale_color_gradient2(low = "blue", mid = "grey", high = "red", midpoint = 0)
plot_grid(p1,p2,p3,nrow = 1)
clust_cd8 <- data_Tcell %>%
mutate_at(vars(sel_markers), asinh) %>%
mutate_at(vars(sel_markers), scale) %>%
dplyr::filter(Tcellcompartment == "CD8+",Disease.phase == "acute") %>%
dplyr::select(sel_markers) %>%
Rphenograph(k = 30)
clust_ids <- data_Tcell %>%
dplyr::filter(Tcellcompartment == "CD8+",Disease.phase == "acute") %>%
pull(cellid)
clust_cd8 <- tibble(cellid = clust_ids, Rpheno = as.character(membership(clust_cd8[[2]])))
data_Tcell <- data_Tcell %>% left_join(clust_cd8)
data_Tcell <- data_Tcell %>% mutate(Rpheno = factor(Rpheno, levels = str_sort(unique(Rpheno), numeric = TRUE)))
data_Tcell %>% dplyr::filter(Tcellcompartment == "CD8+",data_Tcell$Disease.phase == "acute" ) %>%
count(Rpheno) %>%
ggplot(aes(x = Rpheno, y = n, label = n))+
geom_col(position = "dodge") + geom_label()
data_Tcell %>%
dplyr::filter(Tcellcompartment == "CD8+",data_Tcell$Disease.phase == "acute") %>%
ggplot(aes(x = UMAP1, y=UMAP2, color = Rpheno)) +
geom_point(alpha = 0.5,size = 1)+
guides(colour = guide_legend(ncol = 1,override.aes = list(size=3, alpha = 1))) +
theme_classic() +
ggtitle("CD8+") +
theme(plot.title = element_text(hjust = 0.5))
data_Tcell %>%
mutate_at(vars(sel_markers), asinh) %>%
mutate_at(vars(sel_markers), scale) %>%
dplyr::filter(Tcellcompartment == "CD8+" & Disease.phase == "acute") %>%
dplyr::select(sel_markers,Rpheno) %>%
group_by(Rpheno) %>%
summarise_at(vars(sel_markers), funs(mean(., na.rm=TRUE))) %>%
pivot_longer(names_to = "markers", values_to = "avg_zscore", - Rpheno) %>%
mutate(markers = factor(markers,levels = sel_markers)) %>%
ggplot(aes(x = Rpheno, y = markers, fill = avg_zscore)) +
geom_tile() +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", limits = c(-4,4))
markers_category <- c(rep("Differentiation",2),
rep("Co-stimulation",4),
rep("Co-inhibition",4),
rep("Activation",7),
rep("Chemokine receptor",4),
rep("Others",9))
markers_category <- factor(markers_category,
levels=c("Differentiation","Co-stimulation","Co-inhibition","Activation","Chemokine receptor","Others"))
data_heatmap <- data_Tcell %>%
mutate_at(vars(sel_markers), asinh) %>%
mutate_at(vars(sel_markers), scale) %>%
dplyr::filter(Tcellcompartment == "CD8+" & Disease.phase == "acute") %>%
dplyr::select(sel_markers,Rpheno) %>%
group_by(Rpheno) %>%
summarise_at(vars(sel_markers), funs(mean(., na.rm=TRUE))) %>%
column_to_rownames("Rpheno")
data_heatmap %>% t() %>%
Heatmap(name = "z-score avg",
cluster_rows = FALSE,
cluster_columns = TRUE,
rect_gp = gpar(col = "white", lwd = 2),
column_dend_height = unit(4, "cm"),
column_names_rot = 45,
row_split = markers_category,
row_title_rot = 0
)
For the non-weekly analysis, we considered the first sample per patient when multiple samples were available.
selected_ids <- data_Tcell %>%
dplyr::filter(Disease.phase == "acute") %>%
count(Individuals,id,Group,sev_merge,Days.post.symptom.onset) %>%
dplyr::select(-n) %>%
group_by(Individuals) %>%
mutate(n_samples = 1:n()) %>%
mutate(select_id = ifelse(n_samples == 1, TRUE,
ifelse(min(as.numeric(Days.post.symptom.onset)) == as.numeric(Days.post.symptom.onset),
TRUE,
FALSE))) %>%
dplyr::filter(select_id) %>%
pull(id)
data_Tcell %>%
dplyr::filter(id %in% selected_ids, Tcellcompartment == "CD8+") %>%
dplyr::count(id,sev_merge,Tcellcompartment,Rpheno) %>%
group_by(id,Tcellcompartment) %>%
mutate(perc = n/sum(n)*100) %>%
ungroup() %>%
# When using the function count(), if a cluster is absent in a donor, it will not be counted as zero.
# So we complete the count table by filling the missing clusters with a 0.
tidyr::complete(id,Rpheno,fill = list(n=0,perc = 0)) %>%
ungroup() %>%
group_by(Rpheno) %>%
fill(Tcellcompartment, .direction = "downup") %>%
ungroup() %>%
group_by(id) %>%
fill(sev_merge, .direction = "downup") %>%
ungroup() %>%
mutate(perc = ifelse(is.na(perc),0,perc)) %>%
dplyr::filter(!is.na(Tcellcompartment)) %>%
ggplot(aes(x = sev_merge, y = perc, fill = sev_merge)) +
geom_boxplot() +
geom_jitter()+
facet_wrap(~Rpheno, scale = "free") +
scale_fill_manual(values = color_severity) +
theme(axis.text.x = element_blank())